HSCP - 9-digit code for Health and Social Care Partnerships (2016) of residence
HB - 9-digit code for health board of treatment based on boundaries as at 1st April 2019
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.8
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(here)
## Warning: package 'here' was built under R version 4.1.3
## here() starts at C:/Users/mahri/OneDrive/CodeClan/rshiny_dashboard_project/Work In Progress/Demographics
library(readxl)
## Warning: package 'readxl' was built under R version 4.1.3
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(ggplot2)
library(lubridate)
## Warning: package 'lubridate' was built under R version 4.1.3
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(stringr)
Read In Data
covid_admissions_HB_agesex <- read_csv(here("../../raw_data/covid_data/hospital_admissions_hb_agesex_20220302.csv"))
## Rows: 43516 Columns: 12
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (8): HB, HBQF, AgeGroup, AgeGroupQF, Sex, SexQF, AdmissionType, Admissio...
## dbl (4): WeekEnding, NumberAdmissions, Average20182019, PercentVariation
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
covid_admissions_HB_simd <- read_csv(here("../../raw_data/covid_data/hospital_admissions_hb_simd_20220302.csv"))
## Rows: 21138 Columns: 9
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (4): HB, HBQF, AdmissionType, AdmissionTypeQF
## dbl (5): WeekEnding, SIMDQuintile, NumberAdmissions, Average20182019, Percen...
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
Clean Names
covid_admissions_HB_agesex <- janitor::clean_names(covid_admissions_HB_agesex)
covid_admissions_HB_simd <- janitor::clean_names(covid_admissions_HB_simd)
Change week_ending column to a date and creating new day, month, year AND a month and year col
# Health Board x Age sex
covid_admissions_HB_agesex <- covid_admissions_HB_agesex %>%
mutate(week_ending = ymd(week_ending))
covid_admissions_HB_agesex <- covid_admissions_HB_agesex %>%
mutate(
wk_ending_day = day(week_ending),
wk_ending_month = month(week_ending, label = TRUE, abbr = FALSE),
wk_ending_year = year(week_ending)
) %>%
unite(wk_ending_yr_month,
wk_ending_month, wk_ending_year,
remove = FALSE,
sep = " ")
covid_admissions_HB_agesex
#Health board x simd
covid_admissions_HB_simd <- covid_admissions_HB_simd %>%
mutate(week_ending = ymd(week_ending))
covid_admissions_HB_simd <- covid_admissions_HB_simd %>%
mutate(
wk_ending_day = day(week_ending),
wk_ending_month = month(week_ending, label = TRUE, abbr = FALSE),
wk_ending_year = year(week_ending)
) %>%
unite(wk_ending_yr_month,
wk_ending_month, wk_ending_year,
remove = FALSE,
sep = " ")
covid_admissions_HB_simd
Looking to see if i can join data? Probably??? All the HSPC corresponding health board values are S08000015 - s08000032 https://www.opendata.nhs.scot/dataset/geography-codes-and-labels/resource/944765d7-d0d9-46a0-b377-abb3de51d08e
The HB data set is all S08000015 - s08000032
covid_admissions_HB_agesex %>%
distinct(hb)
We only need ACUTE patients:
admission_type has: All, Emergency and Planned * all and emergency always have similar figures compared with planned.
covid_admissions_HB_agesex %>%
group_by(admission_type) %>%
summarise(total = n())
covid_admissions_HB_agesex %>%
group_by(age_group) %>%
summarise(total = n())
covid_admissions_HB_simd%>%
group_by(admission_type) %>%
summarise(total = n())
covid_admissions_HB_simd %>%
group_by(simd_quintile) %>%
summarise(total = n())
covid_admissions_HB_agesex <- covid_admissions_HB_agesex %>%
filter(admission_type == "Emergency")
covid_admissions_HB_simd <- covid_admissions_HB_simd %>%
filter(admission_type == "Emergency")
ANd we don’t need ALL ages either… edit: yes we do: * The “sex” column “Male, Female, All” - m & f only come under “All ages”!
covid_admissions_HB_AGE <- covid_admissions_HB_agesex %>%
filter(age_group != "All ages")
Firstly need a total column for each age group per month:
covid_admissions_HB_agesex %>%
distinct(age_group)
#Under 5
monthly_covid_ads_HB_under5 <- covid_admissions_HB_agesex %>%
filter(age_group == "Under 5") %>%
group_by(wk_ending_yr_month) %>%
mutate(total_admissions_per_month = sum(number_admissions))
monthly_past_average_HB_under5 <- monthly_covid_ads_HB_under5 %>%
group_by(wk_ending_yr_month) %>%
mutate(total_past_average_ads_per_month = sum(average20182019))
monthly_past_average_HB_under5
#5 - 14
monthly_covid_ads_HB_5to14 <- covid_admissions_HB_agesex %>%
filter(age_group == "5 - 14") %>%
group_by(wk_ending_yr_month) %>%
mutate(total_admissions_per_month = sum(number_admissions))
monthly_past_average_HB_5to14 <- monthly_covid_ads_HB_5to14 %>%
group_by(wk_ending_yr_month) %>%
mutate(total_past_average_ads_per_month = sum(average20182019))
monthly_past_average_HB_5to14
#15 - 44
monthly_covid_ads_HB_15to44 <- covid_admissions_HB_agesex %>%
filter(age_group == "15 - 44") %>%
group_by(wk_ending_yr_month) %>%
mutate(total_admissions_per_month = sum(number_admissions))
monthly_past_average_HB_15to44 <- monthly_covid_ads_HB_15to44 %>%
group_by(wk_ending_yr_month) %>%
mutate(total_past_average_ads_per_month = sum(average20182019))
monthly_past_average_HB_15to44
# 45 - 64
a <- covid_admissions_HB_agesex %>%
filter(age_group == "45 - 64") %>%
group_by(wk_ending_yr_month) %>%
mutate(total_admissions_per_month = sum(number_admissions))
a2 <- a %>%
group_by(wk_ending_yr_month) %>%
mutate(total_past_average_ads_per_month = sum(average20182019))
a2
#65-74
b <- covid_admissions_HB_agesex %>%
filter(age_group == "65 - 74") %>%
group_by(wk_ending_yr_month) %>%
mutate(total_admissions_per_month = sum(number_admissions))
b2 <- b %>%
group_by(wk_ending_yr_month) %>%
mutate(total_past_average_ads_per_month = sum(average20182019))
b2
#75-84
c <- covid_admissions_HB_agesex %>%
filter(age_group == "75 - 84") %>%
group_by(wk_ending_yr_month) %>%
mutate(total_admissions_per_month = sum(number_admissions))
c2 <- c %>%
group_by(wk_ending_yr_month) %>%
mutate(total_past_average_ads_per_month = sum(average20182019))
c2
#85 and over
d <- covid_admissions_HB_agesex %>%
filter(age_group == "85 and over") %>%
group_by(wk_ending_yr_month) %>%
mutate(total_admissions_per_month = sum(number_admissions))
d2 <- d %>%
group_by(wk_ending_yr_month) %>%
mutate(total_past_average_ads_per_month = sum(average20182019))
d2
bind together so we have totals for each month:
covid_admissions_HB_age_totals <- bind_rows(monthly_past_average_HB_under5,
monthly_past_average_HB_5to14,
monthly_past_average_HB_15to44,
a2, b2, c2, d2)
covid_admissions_HB_age_totals
April 2020 will be shown first and ages are a mess. So:
dates <- c("January 2020", "February 2020", "March 2020", "April 2020",
"May 2020", "June 2020", "July 2020", "August 2020",
"September 2020", "October 2020", "November 2020", "December 2020",
"January 2021", "February 2021", "March 2021", "April 2021",
"May 2021", "June 2021", "July 2021", "August 2021",
"September 2021", "October 2021", "November 2021", "December 2021",
"January 2022", "February 2022")
summer_dates <- c("April 2020", "May 2020", "June 2020", "July 2020",
"August 2020", "September 2020", "April 2021", "May 2021",
"June 2021", "July 2021", "August 2021", "September 2021")
winter_dates <- c("January 2020", "February 2020", "March 2020", "October 2020",
"November 2020", "December 2020", "January 2021",
"February 2021", "March 2021", "October 2021",
"November 2021", "December 2021", "January 2022",
"February 2022")
PLOT OF Total COVID admissions per month by age
group
covid_admissions_HB_age_totals %>%
mutate(age_group = fct_relevel(age_group,
"Under 5", "5 - 14", "15 - 44",
"45 - 64", "65 - 74", "75 - 84",
"85 and over")) %>%
ggplot()+
aes(x = wk_ending_yr_month,
y = total_admissions_per_month,
group = age_group,
colour = age_group)+
scale_x_discrete(limits = dates) +
geom_point()+
geom_line()+
labs(x = "Month",
y = "Total admissions",
title = "HB Total Admissions per month by age group",
subtitle = "January, 2020 - February 2022",
colour = "Age Group") +
theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust = 0.9))
let’s try again to see if there’s difference in age groups and seasons: * Winter = Q4 and Q1 * Summer = Q2 and Q3
highlight_winter_HB_agesex <- covid_admissions_HB_age_totals %>%
filter(str_detect(wk_ending_yr_month,
"October|November|December|January|February|March"))
highlight_summer_HB_agesex <- covid_admissions_HB_age_totals %>%
filter(str_detect(wk_ending_yr_month,
"April|May|June|July|August|September"))
Re-trying above graph - messy but can be looked into. * Total COVID admissions per month by age group
covid_admissions_HB_age_totals %>%
mutate(age_group = fct_relevel(age_group,
"Under 5", "5 - 14", "15 - 44",
"45 - 64", "65 - 74", "75 - 84",
"85 and over")) %>%
ggplot()+
aes(x = wk_ending_yr_month,
y = total_admissions_per_month,
group = age_group,
fill = age_group)+
scale_x_discrete(limits = dates) +
geom_col()+
facet_wrap(~age_group)+
labs(x = "Month",
y = "Total admissions",
title = "HB Total admissions per month by age group",
subtitle = "January, 2020 - February 2022",
fill = "Age Group") +
theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust = 0.9))
THE ABOVE BUT WINTER vs SUMMER
highlight_summer_HB_agesex %>%
mutate(age_group = fct_relevel(age_group,
"Under 5", "5 - 14", "15 - 44",
"45 - 64", "65 - 74", "75 - 84",
"85 and over")) %>%
ggplot()+
aes(x = wk_ending_yr_month,
y = total_admissions_per_month,
group = age_group,
fill = age_group)+
scale_x_discrete(limits = summer_dates) +
geom_col()+
facet_wrap(~age_group)+
labs(x = "Month",
y = "Total admissions",
title = "HB Total admissions per month by age group",
subtitle = "Summer Months:
April, 2020 - September 2020
and April, 2021 - September 2021",
fill = "Age Group") +
theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust = 0.9))
highlight_winter_HB_agesex %>%
mutate(age_group = fct_relevel(age_group,
"Under 5", "5 - 14", "15 - 44",
"45 - 64", "65 - 74", "75 - 84",
"85 and over")) %>%
ggplot()+
aes(x = wk_ending_yr_month,
y = total_admissions_per_month,
group = age_group,
fill = age_group)+
scale_x_discrete(limits = winter_dates) +
geom_col()+
facet_wrap(~age_group)+
labs(x = "Month",
y = "Total admissions",
title = "HB Total admissions per month by age group",
subtitle = "Winter Months:
January, 2020 - March 2020;
October 2020 - March 2021;
October 2021 - February 2022",
fill = "Age Group") +
theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust = 0.9))
covid_admissions_HB_age_totals %>%
mutate(age_group = fct_relevel(age_group,
"Under 5", "5 - 14", "15 - 44",
"45 - 64", "65 - 74", "75 - 84",
"85 and over")) %>%
group_by(age_group) %>%
ggplot()+
aes(x = number_admissions,
y = average20182019,
colour = age_group)+
geom_point() +
labs(x = "Weekly number of admissions",
y = "Average weekly admissions to hospital in 2018-2019",
title = "HB Weekly admissions per age group against the average weekly
admissions in previous years",
subtitle = "COVID: January, 2020 - February, 2022 /
Previous years: 2018 - 2019",
colour = "Age Group") +
theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust = 0.9))
The above again just looking to see if there are differences in age groups * Weekly admissions per age group against the average weekly admissions in previous years (2018&19)
covid_admissions_HB_AGE %>%
mutate(age_group = fct_relevel(age_group,
"Under 5", "5 - 14", "15 - 44",
"45 - 64", "65 - 74", "75 - 84",
"85 and over")) %>%
group_by(age_group) %>%
ggplot()+
aes(x = number_admissions,
y = average20182019,
colour = age_group)+
geom_point() +
facet_wrap(~age_group) +
labs(x = "Weekly number of admissions",
y = "Average weekly admissions to hospital in 2018-2019",
title = "HB Weekly admissions per age group against the average weekly
admissions in previous years",
subtitle = "COVID: January, 2020 - February, 2022 /
Previous years: 2018 - 2019",
colour = "Age Group") +
theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust = 0.9))
The above but by month instead of EVERY input
covid_admissions_HB_age_totals %>%
mutate(age_group = fct_relevel(age_group,
"Under 5", "5 - 14", "15 - 44",
"45 - 64", "65 - 74", "75 - 84",
"85 and over")) %>%
group_by(age_group) %>%
ggplot()+
aes(x = total_admissions_per_month,
y = total_past_average_ads_per_month,
colour = age_group)+
geom_point() +
labs(x = "Monthly number of admissions",
y = "Average monthly admissions to hospital in 2018-2019",
title = "HB Monthly admissions per age group against the average weekly
admissions in previous years",
subtitle = "COVID: January, 2020 - February, 2022 /
Previous years: 2018 - 2019",
colour = "Age Group") +
theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust = 0.9))
facet by age group
covid_admissions_HB_age_totals %>%
mutate(age_group = fct_relevel(age_group,
"Under 5", "5 - 14", "15 - 44",
"45 - 64", "65 - 74", "75 - 84",
"85 and over")) %>%
group_by(age_group) %>%
ggplot()+
aes(x = total_admissions_per_month,
y = total_past_average_ads_per_month,
colour = age_group)+
geom_point() +
facet_wrap(~age_group) +
labs(x = "Monthly number of admissions",
y = "Average monthly admissions to hospital in 2018-2019",
title = "HB Monthly admissions per age group against the average weekly
admissions in previous years",
subtitle = "COVID: January, 2020 - February, 2022 /
Previous years: 2018 - 2019",
colour = "Age Group") +
theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust = 0.9))
remember that the agesex df has male, female, and all - male and female come under “All ages” but not specific age groups
Creating columns for monthly totals - admissions, and average past ads
FOR FEMALES (males below):
# covid_admissions_HB_agesex %>%
# distinct(sex)
# view(covid_admissions_HB_agesex)
monthly_covid_ads_HB_agesex_female <- covid_admissions_HB_agesex %>%
filter(sex == "Female") %>%
group_by(wk_ending_yr_month) %>%
mutate(total_admissions_per_month = sum(number_admissions))
monthly_past_average_HB_agesex_female <- monthly_covid_ads_HB_agesex_female %>%
group_by(wk_ending_yr_month) %>%
mutate(total_past_average_ads_per_month = sum(average20182019))
monthly_past_average_HB_agesex_female
# covid_admissions_HB_agesex %>%
# distinct(sex)
# view(covid_admissions_HB_agesex)
monthly_covid_ads_HB_agesex_male <- covid_admissions_HB_agesex %>%
filter(sex == "Male") %>%
group_by(wk_ending_yr_month) %>%
mutate(total_admissions_per_month = sum(number_admissions))
monthly_past_average_HB_agesex_male <- monthly_covid_ads_HB_agesex_male %>%
group_by(wk_ending_yr_month) %>%
mutate(total_past_average_ads_per_month = sum(average20182019))
monthly_past_average_HB_agesex_male
bringing male and females with totals per month together:
covid_admissions_HB_sex_totals <- bind_rows(monthly_past_average_HB_agesex_male,
monthly_past_average_HB_agesex_female)
covid_admissions_HB_sex_totals
Total admissions across HBs by gender for week ending (this doesn’t work):
covid_admissions_HB_sex_totals %>%
group_by(sex) %>%
ggplot()+
aes(x = total_admissions_per_month,
y = total_past_average_ads_per_month,
colour = sex)+
scale_x_discrete(limits = dates) +
geom_point()+
geom_line()+
labs(title = "I AM AWARE THIS IS AWFUL")+
theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust = 0.9))
Trying again - Total admissions across HBs by gender for week ending
covid_admissions_HB_sex_totals %>%
ggplot()+
aes(x = wk_ending_yr_month,
y = total_admissions_per_month,
fill = sex)+
scale_x_discrete(limits = dates) +
geom_col(position = "dodge")+
labs(x = "Month and Year",
y = "Total Admissions across Health Boards per Month",
title = "Total Admissions in COVID Times per Month by Sex",
fill = "Sex")+
theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust = 0.9))
weekly
covid_admissions_HB_sex_totals %>%
group_by(sex) %>%
ggplot()+
aes(x = wk_ending_yr_month,
y = total_admissions_per_month,
colour = sex,
group = sex)+
scale_x_discrete(limits = dates) +
geom_point()+
geom_line()+ # where is the line?
labs(x = "Month and Year",
y = "Total Admissions",
title = "Monthly admissions in COVID times by sex",
subtitle = "January, 2020 - February, 2022",
colour = "Sex") +
theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust = 0.9))
covid_admissions_HB_simd %>%
group_by(wk_ending_yr_month, simd_quintile) %>%
ggplot()+
aes(x = number_admissions,
y = average20182019,
colour = simd_quintile)+
scale_x_discrete(limits = dates) +
geom_point()+
labs(x = "Monthly Number of Admissions in COVID Times",
y = "Average Number of Admissions in 2018/2019",
title = "Monthly number of admissions in COVID Times against the average
of the same month in 2018/2019 for SIMD Levels",
colour = "SIMD Level:
1 = Most Deprived
5 = Least Deprived")+
theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust = 0.9))
So, creating monthly total columns for each level of simd:
simd1 <- covid_admissions_HB_simd %>%
filter(simd_quintile == "1") %>%
group_by(wk_ending_yr_month) %>%
mutate(total_admissions_per_month_simd = sum(number_admissions))
simd1 <- simd1 %>%
group_by(wk_ending_yr_month) %>%
mutate(total_past_average_ads_per_month = sum(average20182019))
simd1
#2
simd2 <- covid_admissions_HB_simd %>%
filter(simd_quintile == "2") %>%
group_by(wk_ending_yr_month) %>%
mutate(total_admissions_per_month_simd = sum(number_admissions))
simd2 <- simd2 %>%
group_by(wk_ending_yr_month) %>%
mutate(total_past_average_ads_per_month = sum(average20182019))
simd2
#3
simd3 <- covid_admissions_HB_simd %>%
filter(simd_quintile == "3") %>%
group_by(wk_ending_yr_month) %>%
mutate(total_admissions_per_month_simd = sum(number_admissions))
simd3 <- simd3 %>%
group_by(wk_ending_yr_month) %>%
mutate(total_past_average_ads_per_month = sum(average20182019))
simd3
#4
simd4 <- covid_admissions_HB_simd %>%
filter(simd_quintile == "4") %>%
group_by(wk_ending_yr_month) %>%
mutate(total_admissions_per_month_simd = sum(number_admissions))
simd4 <- simd4 %>%
group_by(wk_ending_yr_month) %>%
mutate(total_past_average_ads_per_month = sum(average20182019))
simd4
#5
simd5 <- covid_admissions_HB_simd %>%
filter(simd_quintile == "5") %>%
group_by(wk_ending_yr_month) %>%
mutate(total_admissions_per_month_simd = sum(number_admissions))
simd5 <- simd5 %>%
group_by(wk_ending_yr_month) %>%
mutate(total_past_average_ads_per_month = sum(average20182019))
simd5
bind these together:
covid_admissions_HB_simd_totals <- bind_rows(simd1, simd2, simd3, simd4, simd5)
covid_admissions_HB_simd_totals
PLOT total admissions per simd per month
covid_admissions_HB_simd_totals %>%
mutate(simd_quintile = fct_relevel(as.character(simd_quintile,
"1", "2", "3", "4", "5"))) %>%
group_by(wk_ending_yr_month, simd_quintile) %>%
ggplot()+
aes(x = wk_ending_yr_month,
y = total_admissions_per_month_simd,
group = simd_quintile,
colour = simd_quintile)+
scale_x_discrete(limits = dates) +
geom_point()+
geom_line()+
labs(x = "Month and Year",
y = "Total admissions",
title = "Total admissions in COVID times per month by SIMD Level",
subtitle = "January, 2020 - February 2022",
colour = "SIMD Level:
1 = Most Deprived
5 = Least Deprived") +
theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust = 0.9))
SIMD - AVERAGE admissions 2018 and 2019 against COVID admissions
covid_admissions_HB_simd_totals %>%
mutate(simd_quintile = fct_relevel(as.character(simd_quintile,
"1", "2", "3", "4", "5"))) %>%
group_by(simd_quintile) %>%
ggplot()+
aes(x = total_admissions_per_month_simd,
y = total_past_average_ads_per_month,
colour = simd_quintile)+
geom_point()+
labs(x = "Monthly number of admissions in COVID times",
y = "Average monthly admissions to hospital in 2018-2019",
title = "Monthly admissions in COVID times against the equivalent monthly
average admissions in previous years by SIMD Level",
subtitle = "COVID: January, 2020 - February, 2022 /
Previous years: 2018 - 2019",
colour = "SIMD Level:
1 = Most Deprived
5 = Least Deprived") +
theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust = 0.9))